#!/usr/bin/env python3

# Mimics GNU timeout, but has special modes which gather test result data and retry failed tests.

######################################### How this works ##########################################
#
# Because we have several different test harnesses and we don't invoke them directly, communication
# between this script and the harness is done through the simplest means possible (environment
# variables to communicate babysitter->harness, files in standard locations harness->babysitter).
#
# The script supports three different ways of extracting test data from the invoked test suite:
#
# 1. "The babysitter protocol": The babysitter sets five environment variables (see below):
#        "Ran test file": A path to a file where the harness should write a line-delimited list of
#            tests which ran to completion.
#        "Failed test file": A path to a file where the harness should write a line-delimited list
#            of tests that failed.
#        "Current test file": A path to a file where the harness should write the currently running
#            test before a test begins, then delete afterward (used to detect early termination).
#        "Run test": A list of test names, used by:
#        "Run mode": This is either RUN or EXCLUDE. If RUN, the test list is a whitelist; run only
#            those tests. If EXCLUDE, the list is a blacklist; run all except those tests.
#    This is the most featureful mode: It can report where we failed in the case of timeouts or
#    crashes that take down the harness, and if the feature is enabled it can retry failed tests.
#    However, it requires modification to the test harness.
#
# 2. NUnit XML: The babysitter also sets a sixth environment variable:
#        "XML list file": A path to a file where the harness should write a line-delimited list of
#        paths to NUnit-format XML result files it created.
#    This also requires modification to the test harness, but less of it.
#
# 3. NUnit XML (manually specified): If the test harness can't be modified, but the caller of the
#    babysitter script happens to know where the harness writes its result XML files, the caller
#    can specify those paths in the "Extra XML" environment variable (see --help)
#
# A single babysitter invocation can currently handle either the babysitter protocol or the XML,
# but never a mix of the two.
#
###################################################################################################

import argparse
import subprocess
import re
import signal
import time
import sys
import os.path
import copy
import tempfile
import calendar
import json
import platform
from xml.dom.minidom import parse as xmlparse

### Constants

# Filenames

CURRENT_TEST_FILE = "babysitter_report_current_test_file.txt"
RAN_TEST_FILE     = "babysitter_report_ran_test_file.txt"
FAILED_TEST_FILE  = "babysitter_report_failed_test_file.txt"
XML_LIST_FILE     = "babysitter_report_xml_list_file.txt"
LOGGING_FILE      = "babysitter_report.json_lines"

# Environment keys

# Keys used for Babysitter<->Nunit IPC
CURRENT_TEST_KEY  = 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files
RAN_TEST_KEY      = 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE'
FAILED_TEST_KEY   = 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE'
XML_LIST_KEY      = 'MONO_BABYSITTER_NUNIT_XML_LIST_FILE'
RUN_KEY           = 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names
RUN_MODE_KEY      = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or EXCLUDE

# Keys used for script configuration (see --help text)
LOG_FILE_KEY      = 'MONO_BABYSITTER_LOG_FILE'       # Path
EXTRA_XML_KEY     = 'MONO_BABYSITTER_EXTRA_XML'      # Semicolon-separated list of paths
RETRY_KEY         = 'MONO_BABYSITTER_RETRY'          # Equal to an integer
VERBOSE_KEY       = 'MONO_BABYSITTER_VERBOSE'        # "Undocumented"-- used for debugging babysitter

# JSON keys

DATE_JSON       = 'date'          # POSIX timestamp of test suite run
INVOKE_JSON     = 'invocation'
COUNT_JSON      = 'iteration'        # How many times was command executed?
LIMIT_JSON      = 'failure_max'
SUPPORT_JSON    = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit?
LOADED_XML_JSON = 'loaded_xml'          # True if we loaded result XML from the test suite
FINAL_CODE_JSON = 'final_code'
TESTS_JSON      = 'tests'         # Holds dictionary of (test case name)->(dict with TEST_ keys below)
TEST_FAILURES         = 'normal_failures'
TEST_CRASH_FAILURES   = 'crash_failures'
TEST_TIMEOUT_FAILURES = 'timeout_failures'

### Interpret arguments

scriptname = sys.argv[0]

# This is very silly: This program needs to switch from argparse to manual parsing
# after the second positional argument, but argparse doesn't let you do this.
# I work around this with a custom subclass which can selectively swallow errors:
class Hesitate(Exception):
	pass
class HesitantParser(argparse.ArgumentParser):
	def __init__(s, *args, **kwargs):
		s.hesitating = True  # Errors will be swallowed until this is set false
		argparse.ArgumentParser.__init__(s, *args, **kwargs)
	def error(s, *args, **kwargs):
		if s.hesitating:
			raise Hesitate() # Bail out before errors are printed.
		argparse.ArgumentParser.error(s, *args, **kwargs)

# Define args
argparser = HesitantParser(description="""\
Run a test suite with a timeout.\n
Durations are floating point numbers followed by an optional unit:\n
's' for seconds (the default)
'm' for minutes
'h' for hours
'd' for days\n
supported environment variables:
  %s: File to write logs to (as line-delimited JSON)
  %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)
  %s: Semicolon-separated list of additional NUnit XMLs to check for errors""" %
		(LOG_FILE_KEY, RETRY_KEY, EXTRA_XML_KEY),
	formatter_class=argparse.RawTextHelpFormatter)
argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM',
	help="Send this signal to the command on timeout, instead of TERM.")
argparser.add_argument('-k', '--kill-after-duration', dest='kill_after', metavar='duration',
	help="If process continues running after signal, send KILL after this duration.")
argparser.add_argument('duration',
	help="Time to run before sending signal.")
argparser.add_argument('command', nargs="+", help="Command+args to run.")

# Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds.
argc = len(sys.argv)
extra_args = []
for limit in range(1,argc+1):
	try:
		if limit == argc: # On the final pass, parse for real
			argparser.hesitating = False
		args = argparser.parse_args(sys.argv[1:limit])
		# If we're still here, parse_args succeeded.
		# The final parsed arg is the command; remaining argv items are its args.
		extra_args = sys.argv[limit:]
		break
	except Hesitate: # Before the final pass, if parse_args fails, skip
		pass

argparser.hesitating = False # Just act like a normal argparser from here

durationre = re.compile(r'(\d+)([smhd]?)')
def parse_duration(duration): # Accept units
	match = durationre.match(duration)
	if not match:
		argparser.error("Could not understand duration %s" % duration)
	time, units = match.group(1), match.group(2)
	time = int(time)
	if units == 'm':
		time *= 60
	elif units == 'h':
		time *= 60*60
	elif units == 'd':
		time *= 60*60*24
	return time

def parse_signal(sig):        # Accept names
	if sig.isdigit():
		return int(sig)
	for k,v in signal.__dict__.items():
		if k == ("SIG%s" % sig):
			return v
	argparser.error("Could not understand signal name %s" % sig)

# Final interpretation of arguments
duration = parse_duration(args.duration)
kill_after = parse_duration(args.kill_after) if args.kill_after is not None else None
timeout_signal = parse_signal(args.signal)
command = args.command + extra_args

# If we are running in Cygwin, Python will believe it is a UNIX application but Mono will be Windows.

cygwin = platform.system().startswith("CYGWIN")

def outgoingPath(path):
	if cygwin: # Invoke cygpath and strip newline
		return subprocess.check_output(["cygpath", "-w", path])[:-1].decode(sys.stdin.encoding)
	return path

def incomingPath(path):
	if cygwin:
		return subprocess.check_output(["cygpath", path])[:-1].decode(sys.stdin.encoding)
	return path

# Some of the things we put in global_env are paths. If we're in cygwin, we have to keep separate
# local-use and env (mono use) copies of these keys.
env_source = {}

# Process environment
global_env = copy.deepcopy( os.environ )

verbose = VERBOSE_KEY in global_env
logging = LOG_FILE_KEY in global_env
logfile = global_env[LOG_FILE_KEY] if logging else None
xml_list = global_env[EXTRA_XML_KEY].split(";") if EXTRA_XML_KEY in global_env and global_env[EXTRA_XML_KEY] else []
crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option.
failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0
babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything.
if babysitting:
	babysitter_dir = tempfile.mkdtemp()
	env_source[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE)
	env_source[RAN_TEST_KEY]     = os.path.join(babysitter_dir, RAN_TEST_FILE)
	env_source[FAILED_TEST_KEY]  = os.path.join(babysitter_dir, FAILED_TEST_FILE)
	env_source[XML_LIST_KEY]     = os.path.join(babysitter_dir, XML_LIST_FILE)

env_source_keys = [CURRENT_TEST_KEY, RAN_TEST_KEY, FAILED_TEST_KEY, XML_LIST_KEY]
for key in env_source_keys:
	global_env[key] = outgoingPath(env_source[key])

have_unix_process_groups = 'killpg' in os.__dict__
have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__

### Timeout implementation

def wait(proc, duration):
	# TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use
	start = time.time()
	while True:
		code = proc.poll()
		if code is not None:
			return code
		if time.time()-start > duration:
			return None
		time.sleep(0.05)

# Popen and send_signal can't be called in their basic forms because we want to
# send signals to all children, not just to the immediately spawned process.
# Unfortunately the way to do this varies by operating system.
def popen(*args, **kwargs):
	if have_unix_process_groups: # Call function on spawn to become process group leader
		kwargs['preexec_fn'] = os.setsid
	elif have_windows_process_groups: # Set magic flag for Windows process groups
		kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
	return subprocess.Popen(*args, **kwargs)

def send_signal(proc, sig):
	if have_unix_process_groups: # UNIX
		# For compatibility with GNU timeout, pre-send the signal to just the monitored process
		os.kill(proc.pid, sig)
		# Send signal to entire group
		try:
			os.killpg(proc.pid, sig)
		except OSError as e:
			sys.stderr.write("%s: Warning, could not kill process group %s because %s\n" % (scriptname, proc.pid, e))
		# For compatibility with GNU Timeout, send a SIGCONT after the signal
		# (so delivery has a chance to occur even for stopped processes)
		if sig != signal.SIGKILL and sig != signal.SIGCONT:
			os.kill(proc.pid, signal.SIGCONT)
	elif have_windows_process_groups: # Windows with Python 2.7 or better
		os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group
	else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported
		proc.send_signal(sig) # No way to contact group, just kill process

### Utility functions

def attemptDelete(path):
	try:
		os.remove(path)
	except OSError:
		pass

def attemptLines(path):
	try:
		with open(path) as f:
			return map(lambda s: s.strip('\r\n'), f.readlines())
	except (OSError, IOError):
		return []

def attemptFirstLine(path):
	lines = attemptLines(path)
	if lines:
		return lines[0]
	return None

def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does:
	return calendar.timegm(time.gmtime())

failcount = {}
def failure_may_retry(test):
	if test not in failcount:
		failcount[test] = 0
	failcount[test] += 1
	return failcount[test] < failmax

def verbose_print(arg):
	if (verbose):
		print(arg)

def failure_annotate(test):
	return "%s (failure #%d of %d allowed)" % (test, failcount[test], failmax)

def pluralize(lst):
	return "s" if len(list(lst)) > 1 else ""

### Run command

def run(): # Returns exit code
	resume_after = []
	retry_these = []
	ever_completed = False
	died_politely = False
	proc = None
	code = None

	# Set up logging
	log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False,
		LOADED_XML_JSON:False, INVOKE_JSON: " ".join(command)}

	def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log
		if add is not None:
			if key not in target:
				target[key] = 0
			target[key] += add
		else:
			target[key] = set

	def log_test(testname, key, set=None, add=None):   # Call to add test-case-level value to log
		if TESTS_JSON not in log:
			log[TESTS_JSON] = {}
		if testname not in log[TESTS_JSON]:
			log[TESTS_JSON][testname] = {}
		log_value(key, set=set, add=add, target=log[TESTS_JSON][testname])

	# Ready to run tests
	try:
		while True:
			env = copy.copy(global_env)
			if ever_completed:
				retry_next = []
			else: # Persist reported failures list until first non-crash run
				retry_next = retry_these

			log_value(COUNT_JSON, add=1)

			# Prepare environment/filesystem
			if babysitting:
				for key in env_source_keys: # Clear all paths intended for use by mono
					attemptDelete(env_source[key])
				if resume_after:
					env[RUN_KEY] = ";".join(resume_after)
					env[RUN_MODE_KEY] = "EXCLUDE"
				elif retry_these:
					env[RUN_KEY] = ";".join(retry_these)
					env[RUN_MODE_KEY] = "RUN"

			# Run test suite
			try:
				proc = popen(command, env=env)
			except OSError:
				died_politely = True
				sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0]))
				sys.exit(127)

			code = wait(proc, duration)
			timed_out = code is None
			if timed_out:                  # Initial timeout
				sys.stderr.write("%s: Sending %s to `%s`\n" % (scriptname, "SIG" + args.signal, command[0]))
				send_signal(proc, timeout_signal)
				if kill_after is not None: # Kill-after timeout
					code = wait(proc, kill_after)
					if code is None:
						sys.stderr.write("%s: Sending SIGKILL to `%s`\n" % (scriptname, command[0]))
						send_signal(proc, signal.SIGKILL)
						code = wait(proc, 1)
						sys.stderr.write("%s: Command `%s` returned with %s after SIGKILL\n" % (scriptname, command[0], str(code)))
					else:
						sys.stderr.write("%s: Command `%s` returned with %s after %s\n" % (scriptname, command[0], str(code), "SIG" + args.signal))
				code = proc.wait()         # Waits forever
				sys.stderr.write("%s: Command `%s` timed out with %s\n" % (scriptname, command[0], str(code)))
			died_politely = True

			# The test suite has now run, and what happens next varies:
			# 1. The suite either completed fully without failures, or timed out: Just quit.
			# 2. The suite crashed (halted without completing):
			#   Remember any failures for later and rerun, using a blacklist of testcases we have completed.
			# 3. The suite completed, but there were failures reported:
			#   Rerun, using a whitelist of only reported-failed testcases.
			# 4. The suite crashed partway through a run with a whitelist:
			#   Rerun, using a whitelist consisting of the previous whitelist minus successful testcases.

			crashed_at = attemptFirstLine(env_source[CURRENT_TEST_KEY])
			failed_tests = attemptLines(env_source[FAILED_TEST_KEY])
			ran_tests = attemptLines(env_source[RAN_TEST_KEY])
			wrote_xml = attemptLines(env_source[XML_LIST_KEY])
			bailout = False

			if crashed_at or failed_tests or ran_tests: # Test suite follows the babysitter protocol
				log_value(SUPPORT_JSON, True)

			if not crashed_at and not ever_completed:  # The resume_after whitelist is only
				resume_after = []                      # used before the first noncrashing run
				ever_completed = True

			if timed_out:       # Currently no retries after timeout
				bailout = True
				code = 124      # See GNU timeout manpage

			if code or crashed_at: # Process failures
				# Handle crash failures
				if crashed_at and not timed_out:
					log_test(crashed_at, TEST_CRASH_FAILURES, add=1)
					if not crash_resuming:
						bailout = True

					if failure_may_retry(crashed_at):
						if ever_completed:           # Rerun with whitelist next time
							for test in retry_these: # Prepopulate with last whitelist minus run testcases
								if test == crashed_at or test not in ran_tests: # (plus crashed testcase)
									retry_next.append(test)
						else:                            # Rerun with blacklist next time
							for test in ran_tests:       # Add testcases we just ran to blacklist
								if test != crashed_at:   # (except for the crashed testcase)
									resume_after.append(test)
					else:
						bailout = True

				# Handle reported failures
				for test in failed_tests:
					log_test(test, TEST_FAILURES, add=1)
					if failure_may_retry(test):
						retry_next.append(test)
					else:
						bailout = True

				# Report human-readable failures for stdout log.
				message = "%s:" % (scriptname)
				if timed_out:
					message += " Saw timeout in test case %s (never allowed)." % (crashed_at)
				elif crashed_at:
					message += " Saw crash in test case %s." % (failure_annotate(crashed_at))
				if failed_tests:
					message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests)))
				if not (timed_out or crashed_at or failed_tests):
					message += " Test suite terminated with code %d, " % (code)
					if log[SUPPORT_JSON]:
						message += "but failure did not occur during a test case. Halting."
					elif xml_list or wrote_xml:
						message += "will extract test results from XML. Halting."
					else:
						message += "and suite cannot report test case data. Halting."
				elif bailout:
					message += " Will halt testing."
				print(message)

			if not log[SUPPORT_JSON]:
				for xml in (xml_list + [incomingPath(xml) for xml in wrote_xml]):
					verbose_print("Will attempt to load XML from %s" % (xml))
					try:
						data = xmlparse(xml).documentElement
						if data.nodeName != 'test-results':
							raise ValueError("Toplevel element was not <test-results />")

						log_value(LOADED_XML_JSON, True)

						search = [data]
						while search:
							nextsearch = []
							for node in search:
								for child in node.childNodes:
									if child.nodeName == 'test-suite' or child.nodeName == 'results':
										nextsearch.append(child) # Descend
									elif child.nodeName == 'test-case':
										name = child.getAttribute("name")
										if child.getAttribute("executed") == "True" and child.getAttribute("success") != "True":
											log_test(name, TEST_FAILURES, add=1)

							search = nextsearch
					except Exception as e:
						print("Could not load XML file %s. Reason: %s" % (xml, e))
					data

			if bailout or not (resume_after or retry_next): # If not retrying
				return code

			# If we got here, a retry is occurring
			retry_these = retry_next

			# Report human-readable retry notice for stdout log.
			message = "%s: Will rerun test suite" % (scriptname)
			if log[COUNT_JSON] > 1:
				message += " (iteration #%d)" % (log[COUNT_JSON])
			if resume_after:
				message += ", resuming at crashed testcase %s." % (crashed_at)
			else:
				message += ", running previously failed testcase%s: %s." % (pluralize(retry_these), "; ".join(retry_these))
			print(message)
	finally:
		# Emergency: Ensure command does not outlive this script
		if proc is not None and not died_politely:
			send_signal(proc, signal.SIGKILL)

		# Write out logs
		log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code)
		if logging:
			with open(logfile, "a") as f:
				f.write(json.dumps(log) + os.linesep)

sys.exit( run() )
